/********************** DESCRIPTION***********************************
This code reproduces the main tables and figures in the paper 
"Reverting to Informality: Unregistered Property Transactions 
and the Erosion of the Titling Reform in Peru"
by Gutierrez and Molina 
**********************************************************************/

clear
set more off

global results "C:\GutierrezMolina"

cap log close 
log using "$results\MainTablesFigures.smcl", replace

use "$results/analysis_data", clear

************************************************
*Table 2. Sample Means *******************
************************************************

*Construct controls for the household head
keep if head==1
ge i=_n

gen female=sex_head==1

recode educlevel_head (0=1) (.=1) (1/4=2) (5=3) (6/7=4), gen(edu_cat) 

gen primaryless				= edu_cat==2
gen secondary				= edu_cat==3
gen postsecondary			= edu_cat==4
drop educlevel_head_missing
gen educlevel_head_missing  = edu_cat==1


gen married=civil_status==1

global lotX "invasion lot_size time_habita" 
global headX "age_head female  educlevel_head_missing married"
global HHX 	"hh_size lincome_family_pc"
global areaX 	"dens_pob coast altitud idh alfabetismo"
global X "$lotX $headX $HHX $areaX "

*Construct indicators for  property transactions in the last 10 years
* Note: a transaction is called "seg_acto" in the data, for secondary transactions (i.e. after titling or purchasing)
gen construction_10=(seg_acto2==1 &  seg_acto_year2>=2000 &  seg_acto_year2<=2010) | (seg_acto7==1 &  seg_acto_year7>=2000 &  seg_acto_year7<=2010)
gen partialsell_10=(seg_acto3==1 &  seg_acto_year3>=2000 &  seg_acto_year3<=2010)
gen mortgage_10=(seg_acto8==1 &  seg_acto_year8>=2000 &  seg_acto_year8<=2010)
gen division_10=(seg_acto5==1 &  seg_acto_year5>=2000 &  seg_acto_year5<=2010) | (seg_acto6==1 &  seg_acto_year6>=2000 &  seg_acto_year6<=2010) | (seg_acto4==1 &  seg_acto_year4>=2000 &  seg_acto_year4<=2010)
gen others_10=(seg_acto9==1 &  seg_acto_year9>=2000 &  seg_acto_year9<=2010)
egen segacto_ever_10=rowtotal(construction_10 partialsell_10 mortgage_10 division_10 others_10)
gen any_segacto_ever_10=segacto_ever_10>0


#delimit ;
global X "
age_head age_head_missing female primaryless secondary postsecondary educlevel_head_missing married
hh_size income_family_pc
invasion lot_size time_habita
coast altitud idh alfabetismo
any_segacto_ever_10
construction_10
partialsell_10
mortgage_10
division_10
others_10";
#delimit cr;

global r=0

foreach var of global X {
global r=${r}+1
}

matrix M=J(${r},4,.)

global c=1

foreach var of global X {
qui: ttest `var', by(area1)
matrix M[${c},1]=round(r(mu_1),0.01)
matrix M[${c},2]=round(r(mu_2),0.01)
matrix M[${c},3]=round(r(mu_1)-r(mu_2),0.01)
matrix M[${c},4]=round(r(p),0.01)
global c=${c}+1
}

#delimit ;
matrix rownames M =
age_head age_head_missing female primaryless secondary postsecondary educlevel_head_missing married
hh_size income_family_pc
invasion lot_size time_habita
coast altitud idh alfabetismo
any_segacto_ever_10
construction_10
partialsell_10
mortgage_10
division_10
others_10;
#delimit cr;

matrix colnames M = Control Treated Diff Pvalue

matrix list M


***********************************************************************************************
*Reshape the data to be at the potential transaction level instead of at the household level 
**********************************************************************************************

reshape long seg_acto seg_acto_year registro registro_year, i(i) j(type)

*Define each transaction type
gen strata=1 if type==2 | type==7
replace strata=2 if type==3
replace strata=3 if type==8
replace strata=4 if type==5 | type==6 | type==4
replace strata=5 if type==9

label define strata 1 "Construction" 2 "Partial Sell" 3 "Mortgage" 4 "Division/Independization/Early inheritance" 5 "Other"
label values strata strata
tab strata, gen(stratum)

*Keep only transaction in the period 2000-2010
keep if (seg_acto_year>=2000 & seg_acto_year<=2010) | seg_acto_year==.

save "$results\tempdata.dta", replace

****************************************************************
*Figure 2. Kaplan-Meier failure function 
****************************************************************

*Keep only observation with positive transactions
keep if seg_acto==1
ge id_new=_n

* Define duration as the time passed until registration (i.e., the survival time)
ge duration=.
replace duration=registro_year-seg_acto_year if registro==1
replace duration=2010-seg_acto_year if registro==0
drop if duration<0
replace duration=duration+1

*Dummy for registering in the first year
cap drop firstyear
gen firstyear=(duration==1 & registro==1)
replace firstyear=. if duration==.

*If the transaction if registered, it most likely happened in the first year:
table strata, c(count id_new mean registro mean duration mean firstyear)
table strata if registro, c(count id_new mean registro mean duration mean firstyear)

#delimit ;
histogram duration if registro==1 & seg_acto_year>=2000 & seg_acto_year<=2010, 
discrete width(1) fraction ytitle(Percent) xtitle(Number of years) 
title(Number of years until transaction registration) 
subtitle((Conditional on registration)) 
caption(Source: Authors calculations) 
scheme(s1color) name(HistogramDuration, replace);
#delimit cr;

*Figure 2 (included in the paper)
stset duration, failure(registro==1)
#delimit ;
sts graph if seg_acto_year>=2000 & seg_acto_year<=2010, 
failure ytitle(Probability of registration) ylabel(0(0.03)0.12) xtitle(Years after transaction) 
xlabel(0(2)10) title("") scheme(s1mono);
#delimit cr;

graph export "$results\Figure2.pdf", as(pdf) replace
graph export "$results\Figure2.png", as(png) replace
graph export "$results\Figure2.eps", as(eps) replace

***********************************************************
******* Preparing the data for econometric analysis  ******
***********************************************************
 
*We expand the data to allow for capturing the effect of 
*the elimination of the RPU

expand duration
bysort id_new: ge seqvar = _n
lab var seqvar "spell year identifier, by hh"

gen time=seqvar
replace time=6 if seqvar>=6 & seqvar!=.
tab time, gen(d)

*Generate variable registry that record the moment of registration
bysort id_new: ge registry=registro==1 & _n==_N
lab var registry "binary depvar for discrete hazard model"


*Define time-varying covariates 
ge year=0
replace year=seg_acto_year+seqvar-1 

*Define the period "after" policy change
ge after_t=0
replace after_t=1 if year>=2004

gen year2=year-2003

*Defined treatment: Treated Area (area1) in the "after" period
ge treated_t=area1*after_t

** Creating dummies for each year

tab year, gen(ano)
drop anocofop

cap drop filter
gen filter=seg_acto_year>=2000 & seg_acto_year<=2010

*Gen district IDs
tostring distrito, gen(distrito_string)
tostring a3, gen(a3_string)

gen l_distrito=length(distrito_string)
replace distrito_string="0"+distrito_string if l_distrito==5

gen l_a3=length(a3_string)
replace a3_string="0"+a3_string if l_a3==5

gen IDDIST=distrito_string if area1==1
replace IDDIST=a3_string if area1==0

************************************************************************
*Figure 1.  Average probability of registration during the first year 
************************************************************************
 
table year area1  if time==1, c(mean registry)
bysort year area1 time: egen meanprobreg=mean(registry)
bysort year area1 time: gen firstobs=_n==1

gen Tmeanprobreg=meanprobreg if area1==1
label var Tmeanprobreg "Treated group"
gen Cmeanprobreg=meanprobreg if area1==0
label var Cmeanprobreg "Control group"

#delimit ;
twoway (connected Cmeanprobreg year if firstobs==1 & area1==0 & year>=2000 & year<=2010 & time==1, sort lcolor(gray) lpattern(dash) mcolor(gray)) 
(connected Tmeanprobreg year if firstobs==1 & area1==1 & year>=2000 & year<=2010 & time==1, sort lcolor(black) lpattern(solid) mcolor(black)), 
ytitle(Probability of registration) xtitle(Year) xline(2004, lwidth(vvvthin) lcolor(black) lpattern(solid)) 
legend(fcolor(none) lcolor(none) region(lcolor(none))) scheme(s1mono);
#delimit cr;

graph export "$results\Figure1.pdf", as(pdf) replace
graph export "$results\Figure1.png", as(png) replace
graph export "$results\Figure1.eps", as(eps) replace

****************************************************************************************************
*Table 3.  Main Regression Results: Effect of eliminating the RPU on the probability of registration
* Results are exported as table1 in the code
* Code include robustness checks (commented out) suggested by reviewers
* Results are also included in Table A1 in Online Appendix
*****************************************************************************************************
 
global cluster "IDDIST"

#delimit ;
global X "
age_head female secondary postsecondary educlevel_head_missing married
hh_size lincome_family_pc
invasion lot_size time_habita
coast altitud idh alfabetismo";
#delimit cr;

* Predictors of registration
reg registry $X year if filter & time==1, cluster(${cluster})
outreg2 using "$results\appendix1", excel replace

* Table 3, Part I: Main regression in first year
************************************************
reg registry after_t area1 treated_t ib1.strata $X year if filter & time==1, cluster(${cluster})
outreg2 treated_t using "$results\table1", excel replace
outreg2 using "$results\appendix1", excel append
margins if after_t==1 & treated_t==1, at(treated_t=1)
margins if after_t==1 & treated_t==1, at(treated_t=0)
margins, dydx(treated_t) 

cap drop sample1y
gen sample1y=e(sample)==1

preserve
keep if sample1y==1

/*Robustness check 1: Aggregating the analysis at the district-year-level (per reviewer-suggestion; table not included in the paper)
************************************************************************************************************************************
tab strata, gen(stratacat)
#delimit ;
global X2 "
stratacat1 stratacat2 stratacat3 stratacat4 stratacat5
age_head female secondary postsecondary educlevel_head_missing married 
hh_size lincome_family_pc invasion lot_size time_habita coast altitud idh alfabetismo
";
#delimit cr;


foreach var of varlist registry after_t area1 treated_t {
bysort ${cluster} year: egen m_`var'=mean(`var')
}

foreach var of global X2 {
bysort ${cluster} year: egen m_`var'=mean(`var')
}

bysort ${cluster} year: gen size=_N
gen cw=size^0.5
bysort ${cluster} year: gen last=_n==_N

reg m_registry after_t area1 treated_t m_stratacat1- m_alfabetismo year [aw=cw] if last==1,  cluster(${cluster})
***************************************************************************************************************/

*Placebo test (keep only observations up to 2003 and make that the year of the change)
drop if year>=2004
cap drop after_t
ge after_t=0
replace after_t=1 if year==2003
cap drop treated_t
ge treated_t=area1*after_t

reg registry after_t area1 treated_t ib1.strata $X year if filter & time==1, cluster(${cluster})

restore

/* Robustness Check 2: Using blocked bootstrap for robustness in inference (per reviewer-suggestion; table not included in the paper)
**************************************************************************************************************************************
preserve
keep if sample1y==1
bootstrap _b[treated_t], cluster(IDDIST) idcluster(bsid) reps(1000) seed(12345): reg registry after_t area1 treated_t ib1.strata $X year
restore
*********************************************************************************/

* Table 3, Part II: Main regression in all years after a transaction
*********************************************************************
gen 	timeafter=time>1
replace timeafter=. if time==.
gen treated_t_after=treated_t*timeafter

reg registry after_t area1 after_t#timeafter area1#timeafter treated_t treated_t_after ib6.time##ib1.strata $X year if filter, cluster(${cluster})
outreg2 treated_t treated_t_after using "$results\table1", excel append
outreg2 using "$results\appendix1", excel append
cap drop sample_all_y
gen sample_all_y=e(sample)==1
lincom treated_t+treated_t_after

preserve
keep if sample_all_y==1

/* Robustness check 1: Aggregating the analysis at the district-year-time since date of the transaction (per reviewer-suggestion; table not included in the paper)
******************************************************************************************************************************************************************
tab strata, gen(stratacat)
#delimit ;
global X2 "
stratacat1 stratacat2 stratacat3 stratacat4 stratacat5
age_head female secondary postsecondary educlevel_head_missing married 
hh_size lincome_family_pc invasion lot_size time_habita coast altitud idh alfabetismo
";
#delimit cr;


foreach var of varlist registry after_t area1 timeafter treated_t treated_t_after {
bysort ${cluster} year time: egen m_`var'=mean(`var')
}

foreach var of global X2 {
bysort ${cluster} year time: egen m_`var'=mean(`var')
}

bysort ${cluster} year time: gen size=_N
gen cw=size^0.5
bysort ${cluster} year time: gen last=_n==_N

#delimit ;
reg m_registry after_t area1 after_t#timeafter area1#timeafter treated_t treated_t_after 
ib6.time##c.m_stratacat1 ib6.time##c.m_stratacat2 ib6.time##c.m_stratacat3 ib6.time##c.m_stratacat4 ib6.time##c.m_stratacat5
m_age_head- m_alfabetismo year [pw=cw] if last==1, cluster(${cluster});
#delimit cr;
lincom treated_t+treated_t_after
*/

*Placebo test
drop if year>=2004
cap drop after_t
ge after_t=0
replace after_t=1 if year==2003
cap drop treated_t
ge treated_t=area1*after_t
cap drop treated_t_after
gen treated_t_after=treated_t*timeafter

reg registry after_t area1 after_t#timeafter area1#timeafter treated_t treated_t_after ib6.time##ib1.strata $X year if filter, cluster(${cluster})
lincom treated_t+treated_t_after

restore

/* Robustness Check 2: Using blocked bootstrap for robustness in inference (per reviewer-suggestion; table not included in the paper)
***************************************************************************************************************************************
preserve
keep if sample_all_y==1
#delimit ;
bootstrap _b[treated_t] _b[treated_t_after] suma=(_b[treated_t]+_b[treated_t_after]), cluster(IDDIST) idcluster(bsid) reps(1000) seed(12345): 
reg registry after_t area1 after_t#timeafter area1#timeafter treated_t treated_t_after 
ib6.time##ib1.strata $X year;
#delimit cr;

restore
***************************************************************************/

*********************************************************************************
*Figure 3.  Predictec factual and counterfactual probabilities of registration 
*********************************************************************************

gen after_t_timeafter=after_t*timeafter
gen area1_timeafter=area1*timeafter

reg registry after_t area1 after_t_timeafter area1_timeafter treated_t treated_t_after ib6.time##ib1.strata $X year if filter, cluster(${cluster})
estimates store allyears
preserve
keep if e(sample)
save "$results\dataFFunction", replace
restore

*Estimated factual probabilities of registration, by year passed after registration 
estimates restore allyears
margins if area1==1 & after_t==1, over(time) post
nlcom 			1-(1-_b[1.time])
nlcom 			1-(1-_b[1.time])*(1-_b[2.time])
nlcom  			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])
nlcom  			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])*(1-_b[4.time])
nlcom  			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])*(1-_b[4.time])*(1-_b[5.time])
 nlcom 			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])*(1-_b[4.time])*(1-_b[5.time])*(1-_b[6.time])

*Estimated counter-factual probabilities of registration, by year passed after registration 
cap drop 	treated_t_orig
gen 		treated_t_orig = treated_t
cap drop 	treated_t_after_orig
gen 		treated_t_after_orig = treated_t_after
replace 	treated_t=0
replace 	treated_t_after=0

estimates restore allyears
margins if area1==1 & after_t==1, over(time) post
nlcom 			1-(1-_b[1.time])
nlcom 			1-(1-_b[1.time])*(1-_b[2.time])
nlcom  			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])
nlcom  			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])*(1-_b[4.time])
nlcom  			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])*(1-_b[4.time])*(1-_b[5.time])
 nlcom 			1-(1-_b[1.time])*(1-_b[2.time])*(1-_b[3.time])*(1-_b[4.time])*(1-_b[5.time])*(1-_b[6.time]) 
 
replace 	treated_t=treated_t_orig
replace 	treated_t_after=treated_t_after_orig

*The number above are included in Figure 3 in the paper


*********************************************************************************
*Figure 4.  Yearly estimated differences in the registration probability 
*(first year after transaction) 
*********************************************************************************

forvalues y=2001/2010 {
cap drop treated_`y'
gen treated_`y'=area1==1 & year==`y'
}

reg registry after_t area1 treated_2* ib1.strata $X year if filter & time==1, cluster(${cluster})

cap drop beta 
cap drop bse
cap drop lci 
cap drop uci
cap drop y
gen beta=.
gen bse=.
gen lci=.
gen uci=.
gen y=.

global c=1
forvalues y=2001/2010 {
dis "Year: `y'"
replace y=`y' in $c
replace beta=_b[treated_`y'] in $c
replace bse=_se[treated_`y'] in $c
replace lci=_b[treated_`y']-invt(e(df_r),0.975)*_se[treated_`y'] in $c
replace uci=_b[treated_`y']+invt(e(df_r),0.975)*_se[treated_`y'] in $c
global c=$c+1
}

#delimit ;
eclplot beta lci  uci y, scheme(s1mono)  ytitle(Diff-in-diff estimates and 95% CI) 
yline(0, lwidth(vvvthin) lpattern(dash) lcolor(black)) xtitle(Year) 
legend(off);
#delimit cr;
graph export "$results\Figure4.pdf", as(pdf) replace
graph export "$results\Figure4.png", as(png) replace
graph export "$results\Figure4.eps", as(eps) replace


*********************************************************************************
*Table 4.  Effects on the probability of registering in first year by subgroups
*********************************************************************************
*By Income
cap drop income_cat
xtile income_cat=income_family_pc if e(sample), n(4)
#delimit ;
global X2 "
age_head female secondary postsecondary educlevel_head_missing married
hh_size 
invasion lot_size time_habita
coast altitud idh alfabetismo";
#delimit cr;

reg registry after_t##income_cat area1##income_cat treated_t##income_cat ib1.strata $X2 year if filter & time==1, cluster(${cluster})
margins if after_t==1 & treated_t==1, over(income_cat) at(treated_t=1)
margins if after_t==1 & treated_t==1, over(income_cat) at(treated_t=0)
margins, dydx(treated_t) over(income_cat) post
outreg2 using "$results\het", excel replace

*By Education
#delimit ;
global X2 "
age_head female married
hh_size lincome_family_pc
invasion lot_size time_habita
coast altitud idh alfabetismo";
#delimit cr;

reg registry after_t##edu_cat area1##edu_cat treated_t##edu_cat ib1.strata $X2 year if filter & time==1, cluster(${cluster})
margins if after_t==1 & treated_t==1, over(edu_cat) at(treated_t=1)
margins if after_t==1 & treated_t==1, over(edu_cat) at(treated_t=0)
margins, dydx(treated_t) over(edu_cat) post
outreg2 using "$results\het", excel append

*By Transaction Type
reg registry after_t##strata area1##strata treated_t##strata $X year if filter & time==1, cluster(${cluster})
margins if after_t==1 & treated_t==1, over(strata) at(treated_t=1)
margins if after_t==1 & treated_t==1, over(strata) at(treated_t=0)
margins, dydx(treated_t) over(strata) post
outreg2 using "$results\het", excel append

cap log close
exit
